### Build Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import OpenAIEmbeddings
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "playground"
os.environ["LANGCHAIN_API_KEY"] = "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"
os.environ["GOOGLE_API_KEY"] = "AIzaSyBJoz7BvdFgWTBwzcu-0xWpJKfEJOR6vPM"

# Set embeddings
embd = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorstore
vectorstore = Chroma.from_documents(
     persist_directory="F:/tmp/demo/chroma_db",
    collection_name="rag-chroma",
    embedding=embd,
)

result = vectorstore.search("What is the meaning of life?")
print(result)